# NNODES=$WORLD_SIZE \
# NODE_RANK=$RANK \

# nproc_per_node=3 
# NPROC_PER_NODE=$nproc_per_node \
# CUDA_VISIBLE_DEVICES=5,6,7 \
# MASTER_PORT=9700 \

# NNODES=1 \
# NODE_RANK="$1" \
# MASTER_ADDR="10.210.21.105" \
# MASTER_PORT=9700 \
# NPROC_PER_NODE=3 \
# CUDA_VISIBLE_DEVICES=5,6,7 \

PYTORCH_CUDA_ALLOC_CONF='expandable_segments:True' \
NPROC_PER_NODE=1 \
CUDA_VISIBLE_DEVICES=6 \
swift sft \
    --model "/data2/wushengyu/model/VLM-FO1_Qwen2.5-VL-3B-v01" \
    --model_type vlm_fo1 \
    --template vlm_fo1 \
    --train_type full \
    --dataset 'humanref_sft' \
    --split_dataset_ratio 0 \
    --load_from_cache_file true \
    --torch_dtype bfloat16 \
    --freeze_vit true \
    --freeze_llm true \
    --freeze_aligner false \
    --num_train_epochs 2 \
    --per_device_train_batch_size 1 \
    --learning_rate 1e-5 \
    --gradient_accumulation_steps 1 \
    --eval_steps 10 \
    --save_steps 10 \
    --save_total_limit 2 \
    --logging_steps 1 \
    --max_length 8192 \
    --output_dir "/data2/wushengyu/model/3_trained_model/swift_model/llava-v1.6-mistral-7b_sft" \
    --system 'You are a helpful assistant.' \
    --warmup_ratio 0.05 \
    --dataloader_num_workers 1 \
    --dataset_num_proc 1 \
    --attn_impl flash_attention_2 \
    --deepspeed zero2 
